4 Laboratory sample processing

4.1 DNA extraction

4.1.1 General statistics

read_tsv("data/extraction.tsv") %>%
   summarise(
    max= max(extraction_total, na.rm = TRUE),
    min= min(extraction_total, na.rm = TRUE),
    mean= mean(extraction_total, na.rm = TRUE),
    sd = sd(extraction_total, na.rm = TRUE)
  ) %>%
  tt()
tinytable_c3ga4t6kqu85jkydn1l0
max min mean sd
7110 0 365.683 677.4012

4.1.2 Sample types

4.1.2.1 Data distribution

left_join(read_tsv("data/extraction.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab"))  %>%
  ggplot(., aes(x=extraction_total, fill=sample_type, color=sample_type)) + 
    geom_density() + 
    scale_color_manual(values = c("#bdca50", "#AA3377")) +   
    scale_fill_manual(values = c("#bdca5050", "#AA337750")) +
    labs(y="Density",x="DNA yield", fill="Sample type", color="Sample type") +
    theme_classic()

4.1.2.2 Comparison

left_join(read_tsv("data/extraction.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  mutate(extraction_total = ifelse(extraction_total == 0, 0.00001, extraction_total)) %>%
  mutate(extraction_total = log(extraction_total)) %>% 
  select(extraction_total,sample_type) %>%
  mutate(sample_type = factor(sample_type)) %>%
  wilcox.test(extraction_total ~ sample_type,data=.)

    Wilcoxon rank sum test with continuity correction

data:  extraction_total by sample_type
W = 335473, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
left_join(read_tsv("data/extraction.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  group_by(sample_type)  %>%
  summarise(
    mean= mean(extraction_total, na.rm = TRUE),
    sd = sd(extraction_total, na.rm = TRUE)) %>%
  tt()
tinytable_zdrcapkepjr6ge5edwmy
sample_type mean sd
Anal/cloacal swab 170.2285 362.7389
Faecal 316.6666 423.6434

4.1.3 Taxonomy

4.1.3.1 Data distribution

left_join(read_tsv("data/extraction.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab"))  %>%
  filter(!is.na(tax_group))  %>%
  ggplot(., aes(x=extraction_total, fill=tax_group, color=tax_group)) + 
    geom_density() +
    scale_color_manual(values = c("#228833","#66CCEE","#CCBB44","#4477AA","#EE6677")) +
    scale_fill_manual(values = c("#22883350","#66CCEE50","#CCBB4450","#4477AA50","#EE667750")) +
    labs(y="Density",x="DNA yield", fill="Sample type", color="Sample type") +
    theme_classic()

left_join(read_tsv("data/extraction.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab"))  %>%
  ggplot(., aes(y=extraction_total,x=tax_group,color=tax_group)) + 
    geom_jitter() +
    scale_color_manual(values = c("#22883380","#66CCEE80","#CCBB4480","#4477AA80","#EE667780")) +
    labs(y="DNA yield",x="Taxonomic group", color="Sample type") +
    theme_classic()

4.1.3.2 Comparison

left_join(read_tsv("data/extraction.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  mutate(extraction_total = ifelse(extraction_total == 0, 0.00001, extraction_total)) %>%
  mutate(extraction_total = log(extraction_total)) %>% 
  select(extraction_total,tax_group) %>%
  mutate(sample_type = factor(tax_group)) %>%
  kruskal.test(extraction_total ~ tax_group,data=.)

    Kruskal-Wallis rank sum test

data:  extraction_total by tax_group
Kruskal-Wallis chi-squared = 946.6, df = 4, p-value < 2.2e-16
left_join(read_tsv("data/extraction.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  group_by(tax_group)  %>%
  summarise(
    mean= mean(extraction_total, na.rm = TRUE),
    sd = sd(extraction_total, na.rm = TRUE)) %>%
  tt()
tinytable_envw3hjj8kcojszncoek
tax_group mean sd
Amphibians 233.35078 358.90219
Bats 59.60574 92.00357
Birds 74.43550 262.34517
Mammals 463.93202 471.41487
Reptiles 394.50619 437.86229

4.2 Sequencing library preparation

4.2.1 Overall

left_join(read_tsv("data/library.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  filter(library_PCR_cycles_required > 0) %>%
   summarise(
    max= max(library_PCR_cycles_required, na.rm = TRUE),
    min= min(library_PCR_cycles_required, na.rm = TRUE),
    mean= mean(library_PCR_cycles_required, na.rm = TRUE),
    sd = sd(library_PCR_cycles_required, na.rm = TRUE)) %>%
  tt()
tinytable_dzoeb97ab3a3fhe0f8p1
max min mean sd
33 2 9.150046 4.390537
left_join(read_tsv("data/library.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  lm(library_PCR_cycles_required ~ library_input_dna, data = .)  %>%
  summary()

Call:
lm(formula = library_PCR_cycles_required ~ library_input_dna, 
    data = .)

Residuals:
     Min       1Q   Median       3Q      Max 
-11.5911  -2.4314  -0.7679   1.8114  21.5487 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)       11.5911019  0.1103983  104.99   <2e-16 ***
library_input_dna -0.0284251  0.0009006  -31.56   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.758 on 2443 degrees of freedom
  (11 observations deleted due to missingness)
Multiple R-squared:  0.2897,    Adjusted R-squared:  0.2894 
F-statistic: 996.2 on 1 and 2443 DF,  p-value: < 2.2e-16

4.2.2 Sample types

left_join(read_tsv("data/library.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  filter(library_input_dna <= 200) %>%
    ggplot(., aes(y=library_PCR_cycles_required, x=library_input_dna, color=sample_type, group=sample_type)) +
        geom_point(alpha=0.5) +
        stat_smooth(method = "lm", formula = y ~ x, geom = "smooth") +
        scale_color_manual(values = c("#bdca50", "#AA3377")) +
        theme_classic() +
        labs(y="Required number of cycles",x="Amount of inputted DNA (ng)", color="Sample type") +
        theme_classic()

4.2.2.1 Comparisons

Relationship between the amount of inputted DNA (ng) and the required number of cycles.

left_join(read_tsv("data/library.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  filter(library_PCR_cycles_required>0 & library_input_dna>0) %>% 
  lme(library_PCR_cycles_required ~ library_input_dna, random=~1 | sample_type, data = .) %>%
  broom.mixed::tidy() %>%
  tt()
tinytable_v9w6xkaesx5h5cfqkcg0
effect group term estimate std.error df statistic p.value
fixed NA (Intercept) 11.87417166 1.1716203608 2330 10.13483 1.187767e-23
fixed NA library_input_dna -0.02501844 0.0008256453 2330 -30.30168 2.507418e-170
ran_pars sample_type sd_(Intercept) 1.64945139 NA NA NA NA
ran_pars Residual sd_Observation 3.30432885 NA NA NA NA

Comparison between slopes.

left_join(read_tsv("data/library.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  lm(library_PCR_cycles_required ~ library_input_dna * sample_type, data = .)  %>%
  broom::tidy() %>%
  tt()
tinytable_f2cxtgiseqorwine5a7z
term estimate std.error statistic p.value
(Intercept) 13.3047246163 0.187031107 71.13642643 0.000000e+00
library_input_dna -0.0260524410 0.001812871 -14.37082274 5.140847e-45
sample_typeFaecal -2.4762116244 0.227612093 -10.87908638 5.960119e-27
library_input_dna:sample_typeFaecal 0.0001282468 0.002078874 0.06169052 9.508143e-01

4.2.3 Taxonomy

left_join(read_tsv("data/library.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  filter(library_input_dna <= 200) %>%
    ggplot(., aes(y=library_PCR_cycles_required, x=library_input_dna, color=tax_group, group=tax_group)) +
        geom_point(alpha=0.5) +
        stat_smooth(aes(fill = tax_group), method = "lm", formula = y ~ x, geom = "smooth") +
        scale_color_manual(values = c("#22883380","#66CCEE80","#CCBB4480","#4477AA80","#EE667780")) +
        scale_fill_manual(values = c("#22883350","#66CCEE50","#CCBB4450","#4477AA50","#EE667750")) +
        theme_classic() +
        labs(y="Required number of cycles",x="Amount of inputted DNA (ng)", color="Sample type", fill="Sample type") +
        theme_classic()